/*******************************************************************************

Name: Figure 1 - health based water quality violations by year

Authors: Jonathan Baker, Lori Bennear, Sheila Olmstead

Input files:
	Primary panel: natlCWS_yr_Panel_mod-small_v2-Match.dta
	number of MCLs per year: number_mcl.dta
		
*******************************************************************************/


* ========================== START START START ==========================


clear
set more off
set graphics off

* ______________________________________________________________________________
* set paths and create log file
global root "C:/Users/jbaker/Documents/Research/SDWIS/Analysis/STATA"
global input "$root/Input"
global intermediate "$root/Intermediate"
global log "$root/LogFiles"
global output "$root/Output"
global outpanel "$root/OutputPanel"
global figures "$root/Figures"

log using "$log/Figure1_violationsByYear.log", replace name(visual)

* ============================================================================== 
* Figure 1
local panelid "501 10k 100k"
foreach pid of local panelid {
use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear

keep if id2001 == 1 & mflag`pid'==1
tab T_`pid' T_`pid'_res
sum pop if T_`pid'_res == 0
sum pop if T_`pid'_res == 1

* ------------------------------------------------------------------------------
* calculate number of utilities in each population category
bysort year: egen temp_cntrl = count(pwsidNUM) if T_`pid'_res == 0
bysort year: egen temp_treat = count(pwsidNUM) if T_`pid'_res == 1
tab temp_cntrl temp_treat, m
replace temp_cntrl = temp_treat if temp_cntrl == .
rename temp_cntrl n_pcat
drop temp_treat
tab n_pcat
tab T_`pid'_res n_pcat
tab year n_pcat


* ------------------------------------------------------------------------------
* collapse data
collapse (sum) MCL_999 MRDL_999 TT_999 MR_999 OTH_999 ALL_999 HLTH_999 (first) n_pcat, by(T_`pid'_res year)

sort year T_`pid'_res
merge m:1 year using "$input/number_mcl.dta"
keep if _merge==3
drop _merge

* ------------------------------------------------------------------------------
* normalize collapsed data by number of utilities, MCL violations, and both


local violtype "ALL HLTH MCL"
foreach type of local violtype {
foreach rule of numlist 999 {

gen `type'_`rule'_byNsys = `type'_`rule'/n_pcat
gen `type'_`rule'_byMCL = `type'_`rule'/number_mcl
gen `type'_`rule'_byNsysMCL = `type'_`rule'/(n_pcat*number_mcl)
}
}
* ------------------------------------------------------------------------------
* reshape the data
keep year T_`pid'_res ALL_* HLTH_* MCL_*
reshape wide ALL_* HLTH_* MCL_*, i(year) j(T_`pid'_res)

* ------------------------------------------------------------------------------
* create plot
if "`pid'" == "501" {
	local legend0 = "service population {&lt} `pid'"
	}
else if "`pid'" == "10k" {
	local legend0 = "501 {&le} service population {&lt} `pid'"
	}
else if "`pid'" == "100k" {
	local legend0 = "10k {&le} service population {&lt} `pid'"
	/* NOTE: if you look at the summation, it looks as if it should be
		 10,001 <= service pop < 100k, but what I am pretty sure is
		 happening is that no utility with 10k pop served as a match
		 for any of the > 100k utilities */
	}
local legend1 = "service population {&ge} `pid'"
local colorscale "black cyan"
local color = "lcolor(`colorscale') mcolor(`colorscale')"

replace HLTH_999_byNsysMCL0 = HLTH_999_byNsysMCL0 * 1000
replace HLTH_999_byNsysMCL1 = HLTH_999_byNsysMCL1 * 1000

// draw graph
/*
gr tw sc HLTH_999_byNsysMCL0 HLTH_999_byNsysMCL1 year, lpattern(dash solid) /// 
 xline(1998, lc(blue)) graphr(color(white)) xla(1990(1)2001) xti("") ///
 ylabel(0(1)4, nogrid) legend(cols(1) pos(6) ring(2) label(1 "`legend0'") ///
 label(2 "`legend1'") region(color(white))) c(l l) `symbol' `color' ///
 ytitle("New violations per thousand systems per MCL")
graph export "$outfig/DIDid2001-`pid'-HLTH-999-NsysMCL-Full.pdf", as(pdf) replace
*/
gr tw sc HLTH_999_byNsysMCL0 HLTH_999_byNsysMCL1 year, lpattern(dash solid) /// 
 xline(1998, lc(blue)) graphr(color(white)) xla(1990(1)2001) xti("") ///
 ylabel(0(1)4, nogrid) legend(cols(2) pos(6) ring(2) label(1 "`legend0'") ///
 label(2 "`legend1'") region(color(white))) c(l l) `symbol' `color' ///
 ytitle("New violations" "per thousand" "systems per MCL") saving("$output/figure`pid'", replace)

}
// export graph
graph combine "$output/figure501.gph" "$output/figure10k.gph" ///
"$output/figure100k.gph", rows(3) cols(1) iscale(.7)
graph export "$figures/DIDid2001-3panel-HLTH-999-NsysMCL-Full.pdf", as(pdf) replace

clear
* ========================== DONE DONE DONE ==========================
log close visual